!! Copyright (C) 2009,2010,2011,2012  Marco Restelli
!!
!! This file is part of:
!!   FEMilaro -- Finite Element Method toolkit
!!
!! FEMilaro is free software; you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 3 of the License, or
!! (at your option) any later version.
!!
!! FEMilaro is distributed in the hope that it will be useful, but
!! WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
!! General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with FEMilaro; If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Marco Restelli                   <marco.restelli@gmail.com>

!>\brief
!!
!! Collection of multistep time integrators
!!
!! \n
!!
!! The methods implemented in this module are aimed at solving the ODE
!! problem
!! \f{displaymath}{
!!  u' = f(t,u).
!! \f}
!!
!! A generic \f$p+1\f$-step method has the form
!! \f{displaymath}{
!!  u_{n+1} = \sum_{j=0}^p a_ju_{n-j} + \Delta t
!!  \sum_{j=-1}^pb_jf_{n-j}.
!! \f}
!! The method is of order \f$q\f$ if it satisfies the following
!! compatibility conditions:
!! \f{displaymath}{
!!  \sum_{j=0}^p (-j)^ia_j + i \sum_{j=-1}^p(-j)^{i-1}b_j = 1, \qquad
!!  i=0,\ldots,q.
!! \f}
!!
!! The initialization of a multistep method must be coherent with the
!! order of the method. In particular, to initialize a method of order
!! \f$q\f$, we have to ensure an error \f$\mathcal{O}( \Delta t^q )\f$
!! on each starting value. Typically, this implies the use of a method
!! of order \f$q-1\f$.
!!
!! \warning Due to the initialization error, the numerical solution of
!! the test problem
!! \f{displaymath}{
!!   u' = t^{q-1}
!! \f}
!! will not be exact. However, the error should remain constant during
!! the integration.
!! 
!! \note The initialization subroutines of the multistep methods also
!! perform the first \f$p\f$ steps; this must be taken into account to
!! keep track of the sistem time.
!!
!! \section BDF BDF schemes
!!
!! BDF schemes are characterized by the fact that \f$b_{-1}\neq0\f$
!! while \f$b_{j}=0\f$ for \f$j\geq0\f$. This implies that the right
!! hand side \f$f\f$ of the problem is never computed explicitly, but
!! only as part of the solution of the implicit problem
!! \f{displaymath}{
!!   u - \Delta t\, b_{-1}\, f(t,u) = b.
!! \f}
!! In practical terms, this means that the member function \c rhs of
!! the class \c c_ode is not used by BDF methods. With this respect,
!! however, there are two issues that deserve attention:
!! <ul>
!!  <li> the computation of the linearization state \f$x_l\f$ in
!!  principle could make explicit use of the right hand side of the
!!  equation \f$f\f$; in the present module, all the BDF schemes
!!  compute \f$x_l\f$ with an extrapolation of the last time steps,
!!  without making explicit use of the right hand side;
!!  <li> the initialization steps in principle could make explicit use
!!  of the right hand side of the equation; again, all the BDF schemes
!!  implemented in this module use BDF type schemes of lower order as
!!  initialization schemes.
!! </ul>
!!
!! \section constrained Constrained problems
!!
!! Some of the solvers defined in the present module can also be used
!! to integrate problems of the form
!! \f{displaymath}{
!!  \begin{array}{rcl}
!!   u' & = & f(t,u,p) \\
!!   g(t,u) & = & 0
!!  \end{array}
!! \f}
!! fore some constraint function \f$g\f$ and associated Lagrange
!! multiplier \f$p\f$. Because of the constraint, an
!! implicit method will be required.
!!
!! Using BDF for constrained problems is straightforward, essentially
!! because such solvers do not make any use of the member function \c
!! rhs of the ODE class \c c_ode. In fact, the only required
!! modification is implementing the \c solve member function so that
!! it solves
!! \f{displaymath}{
!!  \begin{array}{rcl}
!!   u - \sigma f(t,u,p) & = & b_u \\
!!   g(t,u) & = & 0
!!  \end{array}
!! \f}
!! for arbitrary \f$\sigma\f$ and \f$b_u\f$, where \f$b_u\f$ denotes
!! the components of \f$b\f$ corresponding to the prognostic equations
!! for \f$u\f$ (the remaining components of the right hand side
!! \f$b\f$ can be ignored in \c solve).
!<----------------------------------------------------------------------
module mod_multistep

!-----------------------------------------------------------------------

 use mod_messages, only: &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_initialized, &
   wp

 use mod_state_vars, only: &
   mod_state_vars_initialized, &
   c_stv

 use mod_time_integrators_base, only: &
   mod_time_integrators_base_initialized, &
   c_ode, c_ods, c_tint, t_ti_init_data, t_ti_step_diag

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------

! Module interface

 public :: &
   mod_multistep_constructor, &
   mod_multistep_destructor,  &
   mod_multistep_initialized, &
   t_thetam, t_bdf1, t_bdf2, t_bdf3, t_bdf2ex

 private

!-----------------------------------------------------------------------

! Module types

 !> \f$\theta\f$-method
 !!
 !! The method is
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t \, \theta f(u_{n+1}) = u_n + \Delta t \,
 !!  (1-\theta) f(u_{n}).
 !! \f}
 !!
 !! In practice, we allow for an approximation (typically, a
 !! linearization) of the implicit term, so that the method becomes
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t \, \theta \tilde{f}_{x_l}(u_{n+1}) = u_n +
 !!  \Delta t \, (1-\theta) f(u_{n}).
 !! \f}
 !! Concerning the state used for the linearization \f$x_l\f$, we
 !! notice that, in order to obtain a second order method when
 !! \f$\theta=1/2\f$, we need at least a second order approximation of
 !! \f$x_{n+1}\f$. In fact, making the substitution
 !! \f{displaymath}{
 !!  f(t_{n+1},u_{n+1}) \to \tilde{f} + \Delta f,
 !! \f}
 !! we obtain the local trucation error
 !! \f{displaymath}{
 !!  \begin{array}{rcl}
 !!   \displaystyle
 !!   \Delta t \, \tau(\Delta t) & = &
 !!   \displaystyle
 !!   y_{n+1}-y_n-\frac{\Delta t}{2}\left[ f(t_{n+1},y_{n+1}) - \Delta
 !!   f +f(t_{n},y_{n}) \right] \\[3mm]
 !!   & = &
 !!   \displaystyle
 !!   \frac{\Delta t}{2}\Delta f + \mathcal{O}(\Delta t^3).
 !!  \end{array}
 !! \f}
 !! For \f$f\f$ smooth, a second order approximation \f$x_l\f$ of
 !! \f$y_{n+1}\f$ guarantees \f$\Delta f=\mathcal{O}(\Delta t^2)\f$,
 !! preserving the order of the scheme.
 !!
 !! \section thetam_setup Initialization
 !!
 !! The parameter \f$\theta\f$ is taken from
 !! <tt>init_data\%ir1(1)</tt>; default value \f$\theta=0.5\f$.
 type, extends(c_tint) :: t_thetam
  real(wp), private :: theta = 0.5_wp
 contains
  procedure, pass(ti) :: init  => thetam_init
  procedure, pass(ti) :: step  => thetam_step
  procedure, pass(ti) :: clean => thetam_clean
 end type t_thetam

 !> BDF1
 !!
 !! This is in practice the implicit Euler method
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t\, f(u_{n+1}) = u_n.
 !! \f}
 !! This method differs from the \f$\theta\f$-method with
 !! \f$\theta=1\f$ because we use here \f$x_l=u_n\f$, i.e. there is
 !! extrapolation for the linearization state.
 type, extends(c_tint) :: t_bdf1
 contains
  procedure, pass(ti) :: init  => bdf1_init
  procedure, pass(ti) :: step  => bdf1_step
  procedure, pass(ti) :: clean => bdf1_clean
 end type t_bdf1

 !> BDF2
 !!
 !! The method is
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t \, b_{-1}f(u_{n+1}) = a_0u_n + a_1u_{n-1}
 !! \f}
 !! with \f$a_0=4/3\f$, \f$a_1=-1/3\f$ and \f$b_{-1}=2/3\f$.
 !!
 !! In practice, we allow for an approximation (typically, a
 !! linearization) of the implicit term, so that the method becomes
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t \, b_{-1}\tilde{f}_{x_l}(u_{n+1}) = a_0u_n +
 !!  a_1u_{n-1}.
 !! \f}
 !! To ensure second order accuracy, the linearization state \f$x_l\f$
 !! is extrapolated using the last two time steps:
 !! \f{displaymath}{
 !!  x_l = 2u_n - u_{n-1}.
 !! \f}
 type, extends(c_tint) :: t_bdf2
 contains
  procedure, pass(ti) :: init  => bdf2_init
  procedure, pass(ti) :: step  => bdf2_step
  procedure, pass(ti) :: clean => bdf2_clean
 end type t_bdf2

 !> BDF3 initialization
 !!
 !! The method is
 !! \f{displaymath}{
 !!  u_{n+1} - \Delta t \, b_{-1}f(u_{n+1}) = a_0u_n + a_1u_{n-1} +
 !!  a_2u_{n-2}
 !! \f}
 !! with \f$a_0=18/11\f$, \f$a_1=-9/11\f$, \f$a_2=2/11\f$, and
 !! \f$b_{-1}=6/11\f$.
 !!
 !! To ensure third order accuracy, the linearization state \f$x_l\f$
 !! is extrapolated using the last three time steps:
 !! \f{displaymath}{
 !!  x_l = 3u_n - 3u_{n-1} + u_{n-2}.
 !! \f}
 type, extends(c_tint) :: t_bdf3
 contains
  procedure, pass(ti) :: init  => bdf3_init
  procedure, pass(ti) :: step  => bdf3_step
  procedure, pass(ti) :: clean => bdf3_clean
 end type t_bdf3

 !> BDF2ex
 !!
 !! The method is similar to BDF2, except that we use an extrapolation
 !! for the right hand side at time \f$n+1\f$ so that the resulting
 !! method is explicit. In practice, we have
 !! \f{displaymath}{
 !!  u_{n+1} = a_0u_n + a_1u_{n-1} + \Delta t \left[ \, b_0 f(u_n) +
 !!  b_1 f(u_{n-1}) \right],
 !! \f}
 !! with \f$a_0=4/3\f$, \f$a_1=-1/3\f$, \f$b_{0}=4/3\f$ and
 !! \f$b_{1}=-2/3\f$.
 type, extends(c_tint) :: t_bdf2ex
 contains
  procedure, pass(ti) :: init  => bdf2ex_init
  procedure, pass(ti) :: step  => bdf2ex_step
  procedure, pass(ti) :: clean => bdf2ex_clean
 end type t_bdf2ex

! Module variables

 ! public members
 logical, protected ::               &
   mod_multistep_initialized = .false.

 ! private members

 character(len=*), parameter :: &
   this_mod_name = 'mod_multistep'

!-----------------------------------------------------------------------

contains

!-----------------------------------------------------------------------

 subroutine mod_multistep_constructor()
  character(len=*), parameter :: &
    this_sub_name = 'constructor'

   !Consistency checks ---------------------------
   if( (mod_messages_initialized             .eqv..false.) .or. &
       (mod_kinds_initialized                .eqv..false.) .or. &
       (mod_state_vars_initialized           .eqv..false.) .or. &
       (mod_time_integrators_base_initialized.eqv..false.) ) then
     call error(this_sub_name,this_mod_name, &
                'Not all the required modules are initialized.')
   endif
   if(mod_multistep_initialized.eqv..true.) then
     call warning(this_sub_name,this_mod_name, &
                  'Module is already initialized.')
   endif
   !----------------------------------------------

   mod_multistep_initialized = .true.
 end subroutine mod_multistep_constructor

!-----------------------------------------------------------------------
 
 subroutine mod_multistep_destructor()
  character(len=*), parameter :: &
    this_sub_name = 'destructor'
   
   !Consistency checks ---------------------------
   if(mod_multistep_initialized.eqv..false.) then
     call error(this_sub_name,this_mod_name, &
                'This module is not initialized.')
   endif
   !----------------------------------------------

   mod_multistep_initialized = .false.
 end subroutine mod_multistep_destructor

!-----------------------------------------------------------------------
 
 !> \f$\theta\f$-method initialization
 subroutine thetam_init(ti,ode,dt,t,u0,ods,init_data,step_diag)
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: dt, t
  class(c_stv),  intent(inout) :: u0
  class(c_ods),  intent(inout) :: ods
  class(t_thetam), intent(out) :: ti
  type(t_ti_init_data), optional, intent(in)  :: init_data
  type(t_ti_step_diag), optional, intent(out) :: step_diag
 
  character(len=*), parameter :: &
    this_sub_name = 'thetam_init'

   ti%dt = dt

   if(present(init_data)) then
     if(allocated(init_data%ir1)) ti%theta = init_data%ir1(1)
   endif

   ! TM needs 3 work arrays
   !allocate( ti%work(3) , source=u )
   call u0%source_vect(ti%work,3)
 
 end subroutine thetam_init
 
!-----------------------------------------------------------------------
 
 !> \f$\theta\f$-method step
 subroutine thetam_step(unew,ti,ode,t,unow,ods,step_diag)
  class(t_thetam), intent(inout) :: ti
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: t
  class(c_stv),  intent(in)    :: unow
  class(c_ods),  intent(inout) :: ods
  class(c_stv),  intent(inout) :: unew
  type(t_ti_step_diag), optional, intent(inout) :: step_diag
 
  real(wp) :: dt, th
  character(len=*), parameter :: &
    this_sub_name = 'thetam_step'

   dt = ti%dt
   th = ti%theta

   ! compute the tendency
   call ode%rhs(ti%work(1),t,unow,ods)
   ! compute the rhs
   call ti%work(2)%alt( unow , (1.0_wp-th)*dt , ti%work(1) )
   ! compute the linearization state by explicit Euler
   call ti%work(3)%alt( unow ,      dt        , ti%work(1) )
   ! solve the implicit problem
   call ode%solve( unew , t+dt,th*dt , ti%work(2) , ti%work(3) , ods )
 
 end subroutine thetam_step
 
!-----------------------------------------------------------------------
 
 !> \f$\theta\f$-method clean-up
 pure subroutine thetam_clean(ti,ode,step_diag)
  class(c_ode),  intent(in)    :: ode
  class(t_thetam), intent(inout) :: ti
  type(t_ti_step_diag), optional, intent(inout) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'thetam_clean'

  deallocate(ti%work)
 
 end subroutine thetam_clean
 
!-----------------------------------------------------------------------

 !> BDF1 initialization
 subroutine bdf1_init(ti,ode,dt,t,u0,ods,init_data,step_diag)
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: dt, t
  class(c_stv),  intent(inout) :: u0
  class(c_ods),  intent(inout) :: ods
  class(t_bdf1), intent(out)   :: ti
  type(t_ti_init_data), optional, intent(in)  :: init_data
  type(t_ti_step_diag), optional, intent(out) :: step_diag
 
  character(len=*), parameter :: &
    this_sub_name = 'bdf1_init'

   ti%dt = dt

   ! BDF1 needs 3 work arrays
   !allocate( ti%work(3) , source=u )
   call u0%source_vect(ti%work,3)

 end subroutine bdf1_init
 
!-----------------------------------------------------------------------
 
 !> BDF1 step
 subroutine bdf1_step(unew,ti,ode,t,unow,ods,step_diag)
  class(t_bdf1), intent(inout) :: ti
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: t
  class(c_stv),  intent(in)    :: unow
  class(c_ods),  intent(inout) :: ods
  class(c_stv),  intent(inout) :: unew
  type(t_ti_step_diag), optional, intent(inout) :: step_diag
 
  character(len=*), parameter :: &
    this_sub_name = 'bdf1_step'

   call ode%solve( unew , t+ti%dt , ti%dt , unow , unow , ods )

 end subroutine bdf1_step
 
!-----------------------------------------------------------------------
 
 !> BDF1 clean-up
 pure subroutine bdf1_clean(ti,ode,step_diag)
  class(c_ode),  intent(in)    :: ode
  class(t_bdf1), intent(inout) :: ti
  type(t_ti_step_diag), optional, intent(inout) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'bdf1_clean'

  deallocate(ti%work)
 
 end subroutine bdf1_clean
 
!-----------------------------------------------------------------------

 !> BDF2 initialization
 !!
 !! This subroutine performs the first time step using the implicit
 !! Euler method and advances u0 accordingly. The <tt>ode\%work</tt>
 !! array is allocated and filled so that it can be used for
 !! subsequent calls to bdf2_step: <tt>ode\%work(1)</tt> is used for
 !! the previous step and <tt>ode\%work(2)</tt> as work array.
 subroutine bdf2_init(ti,ode,dt,t,u0,ods,init_data,step_diag)
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: dt, t
  class(c_stv),  intent(inout) :: u0
  class(c_ods),  intent(inout) :: ods
  class(t_bdf2), intent(out)   :: ti
  type(t_ti_init_data), optional, intent(in)  :: init_data
  type(t_ti_step_diag), optional, intent(out) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'bdf2_init'

   ti%dt = dt

   ! BDF2 needs two work arrays
   !allocate( ti%work(2) , source=u0 )
   call u0%source_vect(ti%work,2)

   ! set uold
   call ti%work(1)%copy(u0)

   ! compute the first time step (implicit Euler)
   call ode%solve( u0 , t+ti%dt,ti%dt , ti%work(1),ti%work(1),ods )

   ! diagnostics
   step_diag%bootstrap_steps = 1

 end subroutine bdf2_init
 
!-----------------------------------------------------------------------
 
 !> BDF2 step
 subroutine bdf2_step(unew,ti,ode,t,unow,ods,step_diag)
  class(t_bdf2), intent(inout) :: ti
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: t
  class(c_stv),  intent(in)    :: unow
  class(c_ods),  intent(inout) :: ods
  class(c_stv),  intent(inout) :: unew
  type(t_ti_step_diag), optional, intent(inout) :: step_diag
 
  real(wp), parameter :: &
    a0 =  4.0_wp/3.0_wp, &
    a1 = -1.0_wp/3.0_wp, &
    bm =  2.0_wp/3.0_wp
  real(wp) :: dt
  character(len=*), parameter :: &
    this_sub_name = 'bdf2_step'

   ! rhs of the implicit problem
   call ti%work(2)%mlt(a1,ti%work(1))
   call ti%work(2)%inlt(a0,unow)

   ! extrapolation (can be stored in work(1), not needed any more)
   call ti%work(1)%tims(-1.0_wp)
   call ti%work(1)%inlt(2.0_wp,unow)

   ! solve the implicit problem
   dt = ti%dt
   call ode%solve( unew , t+dt , bm*dt , ti%work(2) , ti%work(1),ods )

   ! update the multistep history
   call ti%work(1)%copy(unow)

 end subroutine bdf2_step
 
!-----------------------------------------------------------------------
 
 !> BDF2 clean-up
 pure subroutine bdf2_clean(ti,ode,step_diag)
  class(c_ode),  intent(in)    :: ode
  class(t_bdf2), intent(inout) :: ti
  type(t_ti_step_diag), optional, intent(inout) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'bdf2_clean'

  deallocate(ti%work)
 
 end subroutine bdf2_clean
 
!-----------------------------------------------------------------------

 !> BDF3 initialization
 !!
 !! This subroutine performs the first two steps using the BDF2 method
 !! and copies \c unow in \c uold.
 subroutine bdf3_init(ti,ode,dt,t,u0,ods,init_data,step_diag)
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: dt, t
  class(c_stv),  intent(inout) :: u0
  class(c_ods),  intent(inout) :: ods
  class(t_bdf3), intent(out)   :: ti
  type(t_ti_init_data), optional, intent(in)  :: init_data
  type(t_ti_step_diag), optional, intent(out) :: step_diag
 
  real(wp), parameter :: &
    a0 =  4.0_wp/3.0_wp, &
    a1 = -1.0_wp/3.0_wp, &
    bm =  2.0_wp/3.0_wp
  real(wp) :: dt2
  class(c_stv), allocatable :: tmp
  character(len=*), parameter :: &
    this_sub_name = 'bdf3_init'

   ti%dt = dt

   ! BDF3 needs 3 work arrays
   !allocate( ti%work(3) , source=u0 )
   call u0%source_vect(ti%work,3)
   ! this subroutine needs one more temporary
   call u0%source(tmp)

   ! set u_{n-2} (implicit in the source, but repeated for clarity)
   call ti%work(1)%copy(u0)

   ! leap-frog type step for the first step: set uold(1)
   !
   ! We solve the problem
   !     (u1-u0)/dt = f((u1+u0)/2)
   ! To solve this problem we work with the unknown z = (u1+u0)/2, so
   ! that the problem reads
   !     z - dt/2*f(z) = u0
   ! To obtain a good linearization state we make one iteration.
   dt2 = dt/2.0_wp
   call tmp%copy(u0) ! initial guess for solve
   call ode%solve( tmp , t+dt2 , dt2 , u0 , u0 , ods )

   call ti%work(2)%copy(u0) ! initial guess for solve
   call ode%solve( ti%work(2) , t+dt2 , dt2 , u0 , tmp , ods )
   call ti%work(2)%tims(2.0_wp) ! compute u1 = 2*z - u0
   call ti%work(2)%inlt(-1.0_wp,ti%work(1))

   ! BDF2 step: set u0
   call tmp%copy(u0)
   call ti%work(3)%mlt( a1,ti%work(1))
   call ti%work(3)%inlt(a0,ti%work(2))     ! define rhs
   call        tmp%mlt(-1.0_wp,ti%work(1))
   call        tmp%inlt(2.0_wp,ti%work(2)) ! extraplation
   call ode%solve( u0 , t+2.0_wp*dt , bm*dt , ti%work(3) , tmp , ods )

   deallocate(tmp)

   ! diagnostics
   step_diag%bootstrap_steps = 2

 end subroutine bdf3_init
 
!-----------------------------------------------------------------------
 
 !> BDF3 step
 subroutine bdf3_step(unew,ti,ode,t,unow,ods,step_diag)
  class(t_bdf3), intent(inout) :: ti
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: t
  class(c_stv),  intent(in)    :: unow
  class(c_ods),  intent(inout) :: ods
  class(c_stv),  intent(inout) :: unew
  type(t_ti_step_diag), optional, intent(inout) :: step_diag
 
  real(wp), parameter :: &
    a0 =  18.0_wp/11.0_wp, &
    a1 = - 9.0_wp/11.0_wp, &
    a2 =   2.0_wp/11.0_wp, &
    bm =   6.0_wp/11.0_wp
  real(wp) :: dt
  character(len=*), parameter :: &
    this_sub_name = 'bdf3_step'

   ! rhs of the implicit problem
   call ti%work(3)%mlt(a0,unow)
   call ti%work(3)%inlv( (/a2,a1/) , ti%work(1:2) )

   ! extrapolation (can be stored in work(1), not needed any more)
   call ti%work(1)%inlt(-3.0_wp,ti%work(2))
   call ti%work(1)%inlt( 3.0_wp,unow )

   ! solve the implicit problem
   dt = ti%dt
   call ode%solve( unew , t+dt , bm*dt , ti%work(3) , ti%work(1),ods )

   ! update the multistep history
   call ti%work(1)%copy(ti%work(2))
   call ti%work(2)%copy(unow)

 end subroutine bdf3_step
 
!-----------------------------------------------------------------------
 
 !> BDF3 clean-up
 pure subroutine bdf3_clean(ti,ode,step_diag)
  class(c_ode),  intent(in)    :: ode
  class(t_bdf3), intent(inout) :: ti
  type(t_ti_step_diag), optional, intent(inout) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'bdf3_clean'

  deallocate(ti%work)
 
 end subroutine bdf3_clean
 
!-----------------------------------------------------------------------

 !> BDF2ex initialization
 subroutine bdf2ex_init(ti,ode,dt,t,u0,ods,init_data,step_diag)
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: dt, t
  class(c_stv),  intent(inout) :: u0
  class(c_ods),  intent(inout) :: ods
  class(t_bdf2ex), intent(out) :: ti
  type(t_ti_init_data), optional, intent(in)  :: init_data
  type(t_ti_step_diag), optional, intent(out) :: step_diag
 
  character(len=*), parameter :: &
    this_sub_name = 'bdf2ex_init'

   ti%dt = dt

   ! BDF2 needs two work arrays
   !allocate( ti%work(2) , source=u0 )
   call u0%source_vect(ti%work,2)

   ! set uold
   call ti%work(1)%copy(u0)

   ! compute the first time step (explicit Euler)
   call ode%rhs(ti%work(2),t,u0,ods)
   call u0%inlt(ti%dt,ti%work(2))

   ! diagnostics
   step_diag%bootstrap_steps = 1

 end subroutine bdf2ex_init
 
!-----------------------------------------------------------------------
 
 !> BDF2ex step
 subroutine bdf2ex_step(unew,ti,ode,t,unow,ods,step_diag)
  class(t_bdf2ex), intent(inout) :: ti
  class(c_ode),  intent(in)    :: ode
  real(wp),      intent(in)    :: t
  class(c_stv),  intent(in)    :: unow
  class(c_ods),  intent(inout) :: ods
  class(c_stv),  intent(inout) :: unew
  type(t_ti_step_diag), optional, intent(inout) :: step_diag
 
  real(wp), parameter :: &
    a0 =  4.0_wp/3.0_wp, &
    a1 = -1.0_wp/3.0_wp, &
    b0 =  4.0_wp/3.0_wp, &
    b1 = -2.0_wp/3.0_wp
  character(len=*), parameter :: &
    this_sub_name = 'bdf2ex_step'

   ! compute the "a" terms -> work(1)
   call ti%work(1)%tims(a1)
   call ti%work(1)%inlt(a0,unow)

   ! compute the "b" terms -> unew
   call unew%mlt(b1,ti%work(2))
   call ode%rhs(ti%work(2),t,unow,ods) ! store for the next step
   call unew%inlt(b0,ti%work(2))
   call unew%tims(ti%dt)

   ! sum the terms
   call unew%incr(ti%work(1))

   ! update the multistep history
   call ti%work(1)%copy(unow)
   ! ti%work(2) already has the correct value

 end subroutine bdf2ex_step
 
!-----------------------------------------------------------------------
 
 !> BDF2ex clean-up
 pure subroutine bdf2ex_clean(ti,ode,step_diag)
  class(c_ode),  intent(in)    :: ode
  class(t_bdf2ex), intent(inout) :: ti
  type(t_ti_step_diag), optional, intent(inout) :: step_diag

  character(len=*), parameter :: &
    this_sub_name = 'bdf2ex_clean'

  deallocate(ti%work)
 
 end subroutine bdf2ex_clean
 
!-----------------------------------------------------------------------

end module mod_multistep
